load("~/Documents/ISU/23Fall/STAT579/final project/data/border_clean.Rdata")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
#ggplotly()
#read data
Canada_border <- border %>%
  filter(Border == "US-Canada Border")%>%
  mutate(
    Year = factor(Year,levels = c(1996,1997,1998,1999,2000,2001,2002,
                                  2003,2004,2005,2006,2007,2008,2009,2010,2011,
                                  2012,2013,2014,2015,2016,2017,2018,2019,2020,
                                  2021,2022,2023))
  )

#port code How many port have complete set How many port should we consider ? Does Port_name change ? There’s 118 levels but I only have 90 rows. Only 89 when I group_by Port_name

#Eastport

Canada_border %>%
  filter(Port_name == 'Eastport')%>%
  group_by(Port_code)%>%
  summarise(
    n = n()
  )
## # A tibble: 2 × 2
##   Port_code     n
##   <fct>     <int>
## 1 0103       2558
## 2 3302       3896

Eastport has two Port_code

Canada_border %>%
  filter(Port_name == 'Eastport')%>%
  group_by(Port_code)%>%
  ggplot(aes(x = Year,y = Value,colour = Port_code))+geom_point()

State

Canada_border %>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State)

Canada_border %>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

#Measure

Canada_border %>%
  group_by(Measure) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~Measure)

Canada_border %>%
  group_by(Measure) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~Measure,scales = 'free_y')

We can see some seasonal patterns in measure

How about group_by State with specific measure ?

#Bus

Canada_border %>%
  filter(Measure == 'Buses')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State)

Canada_border %>%
  filter(Measure == 'Buses')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

#Personal Vehicles

Canada_border %>%
  filter(Measure == 'Personal Vehicles')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State)

Canada_border %>%
  filter(Measure == 'Personal Vehicles')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

#Personal Vehicle Passengers

Canada_border %>%
  filter(Measure == 'Personal Vehicle Passengers')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State)

Canada_border %>%
  filter(Measure == 'Personal Vehicle Passengers')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

I think Personal Vehicles is a factor.

Among_State <- Canada_border %>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
Bus_Among_State <- Canada_border %>%
  filter(Measure == 'Buses')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

Personal_Vehicles_Among_State <- Canada_border %>%
  filter(Measure == 'Personal Vehicles')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

Personal_Vehicles_Passengers_Among_State <- Canada_border %>%
  filter(Measure == 'Personal Vehicle Passengers')%>%
  group_by(State) %>%
  ggplot(aes(x = Date, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')

grid.arrange(Among_State, Bus_Among_State, nrow = 2)

grid.arrange(Among_State, Personal_Vehicles_Among_State, nrow = 2)

grid.arrange(Among_State, Personal_Vehicles_Passengers_Among_State, nrow = 2)

#Consider average in each month

Canada_border %>%
  group_by(Year,Month) %>%
  summarise(
    average_value = mean(Value,na.rm = T)
  ) %>%
  ggplot(aes(x = factor(Month), y=average_value,group = Year,colour = Year))+geom_line()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.

Canada_border %>%
  group_by(Year,Month) %>%
  summarise(
    average_value = mean(Value,na.rm = T)
  ) %>%
  ggplot(aes(x = factor(Month), y=average_value,colour = Year))+geom_line(aes(group = Year)) +facet_wrap(~Year)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.